archived/credit_card_fraud_detector/env_setup.py (138 lines of code) (raw):
import argparse
import json
from pathlib import Path
import subprocess
import logging
import sys
CURRENT_FILE = Path(__file__).resolve()
CURRENT_FOLDER = CURRENT_FILE.parent
LOGBOOK_FILE = Path(CURRENT_FOLDER, 'env_setup_logbook.json')
ROOT_PATHS = {'NotebookInstance': Path('/home/ec2-user'), 'Studio': Path('/root')}
BIN_PATHS = {'NotebookInstance': Path('/usr/bin'), 'Studio': Path('/opt/conda/bin')}
# Common setup
def get_sagemaker_mode() -> str:
stack_outputs_file = Path(CURRENT_FOLDER, 'stack_outputs.json')
with open(stack_outputs_file) as f:
outputs = json.load(f)
sagemaker_mode = outputs['SagemakerMode']
if sagemaker_mode not in set(['Studio', 'NotebookInstance']):
raise ValueError('SagemakerMode should be Studio or NotebookInstance. Check stack_outputs.json.')
return sagemaker_mode
def get_executable() -> str:
return sys.executable
def get_hostname() -> str:
hostname_file = Path('/etc/hostname')
if hostname_file.is_file():
with open(hostname_file, 'r') as f:
contents = f.readlines()
assert len(contents) == 1
hostname = contents[0].strip()
else:
logging.warning(f'Could not find {hostname_file}. Setting hostname to None.')
hostname = None
return hostname
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description='Setup environment for solution.')
parser.add_argument('--force', action='store_true',)
parser.add_argument('--log-level', type=str, default='INFO')
args = parser.parse_args()
return args
def read_file(file: str) -> str:
with open(file, 'r') as f:
return f.read()
def bash(cmd: str) -> subprocess.CompletedProcess:
try:
if logging.root.level > logging.DEBUG:
stdout = subprocess.PIPE
stderr = subprocess.PIPE
else:
stdout = sys.stdout
stderr = sys.stderr
process = subprocess.run(
"set -e" + '\n' + cmd,
shell=True,
check=True,
universal_newlines=True, # same as text=True but support py3.6 too
stdout=stdout,
stderr=stderr
)
except subprocess.CalledProcessError as e:
if logging.root.level > logging.DEBUG:
logging.error('\n' + e.stderr)
raise e
return process
def logging_setup(level: str) -> None:
level = logging.getLevelName(level)
logging.basicConfig(stream=sys.stdout, level=level)
def env_setup() -> None:
args = parse_args()
logging_setup(args.log_level)
sagemaker_mode = get_sagemaker_mode()
if sagemaker_mode == 'Studio':
hostname = get_hostname()
logging.debug(f'hostname: {hostname}')
executable = get_executable()
logging.debug(f'executable: {executable}')
if args.force or not in_logbook(hostname, executable):
env_setup_studio()
logging.info('Successfully setup environment.')
add_to_logbook(hostname, executable)
else:
logging.info('Skipping. Already setup environment.')
if sagemaker_mode == 'NotebookInstance':
if args.force:
env_setup_notebook_instance()
logging.info('Successfully setup environment.')
else:
logging.info('Skipping. Already setup environment.')
def in_logbook(hostname: str, executable: str) -> bool:
if LOGBOOK_FILE.is_file():
with open(LOGBOOK_FILE, 'r') as f:
logbook = json.load(f)
for entry in logbook:
if (entry['hostname'] == hostname) and (entry['executable'] == executable):
return True
logging.debug('Could not find a matching entry in logbook.')
return False
else:
logging.debug(f'Could not find logbook at {LOGBOOK_FILE}.')
return False
def add_to_logbook(hostname: str, executable: str) -> None:
if (hostname is None) or (executable is None):
logging.warn('Could not add to logbook because either hostname or executable is empty.')
else:
new_entry = {'hostname': hostname, 'executable': executable}
if LOGBOOK_FILE.is_file():
with open(LOGBOOK_FILE, 'r') as f:
logbook = json.load(f)
else:
logbook = []
for entry in logbook:
if (entry['hostname'] == hostname) and (entry['executable'] == executable):
return # don't need to add since already in logbook
logbook.append(new_entry)
with open(LOGBOOK_FILE, 'w') as f:
json.dump(logbook, f)
# Solution specific setup
def env_setup_notebook_instance() -> None:
logging.info('Starting environment setup for Notebook Instance.')
py_exec = get_executable()
logging.info('Uninstalling Python packages installed with distutils.')
bash("""
# fix to upgrade `docutils` that was installed with `distutils` (hence pip can't uninstall)
rm -rf /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/docutils
rm -rf /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/docutils-*
""")
# This is due to papermill->black causing an inconsistency
logging.info("Removing incompatible package")
bash(f"""
export PIP_DISABLE_PIP_VERSION_CHECK=1
{py_exec} -m pip uninstall -y enum34
""")
logging.info('Upgrading pip packages.')
bash(f"""
export PIP_DISABLE_PIP_VERSION_CHECK=1
{py_exec} -m pip install --upgrade pyyaml --ignore-installed
""")
logging.info('Installing pip packages.')
bash(f"""
export PIP_DISABLE_PIP_VERSION_CHECK=1
{py_exec} -m pip install -r {CURRENT_FOLDER}/notebooks/requirements.txt
{py_exec} -m pip install -e {CURRENT_FOLDER}/notebooks/
""")
def env_setup_studio() -> None:
logging.info('Starting environment setup for Studio.')
py_exec = get_executable()
logging.info('Installing local packages.')
bash(f"""
export PIP_DISABLE_PIP_VERSION_CHECK=1
{py_exec} -m pip install -e {CURRENT_FOLDER}/notebooks/
""")
logging.info('Completed environment setup for Studio.')
if __name__ == "__main__":
env_setup()